*** BCG Energy data
* Bustein, Carvalho and Grassi

****************************** 1. Preamble ****************************************************************

* clean up 
	clear all 
	macro drop _all
	set max_memory 50g
	set matsize 11000
*set working directory. 
	cd "C:\Users\Public\Documents\BCG_DGM\BCG\replication_file_jan25_package\"


	

**************************** 2. Import data for each year ****************************************************************


*Prepare each waves
	forvalues year =2008/2020{
		**Establisment Level Data
			*Some information to see which year we are working on
			di "-------------------------- Importing year: `year' ----------------------"
			
			*Import a given year
			use data_deep/eacei/eacei`year', clear  

			*rename some variables
			capture rename SIRET siret
			
			capture rename POIDS poids
			

			*create siren from siret
			gen siren = substr(siret,1,9)
			
			gen year=`year'
			
			gen naf2d = substr(AR_APET,1,2)
			destring naf2d, replace

			*Check numbers of SIRET per SIREN
			*duplicates report siren

			*Rename variables about energy

				**Total value of purchase of energy
				rename FACTURE facture

				**Net Consumption of enrgy (in TEP = tons equivalent petrol)
				rename CONSNET consnet
				
				**Gross Consumption of enrgy (in TEP = tons equivalent petrol)
				rename CONSBRT consbrt
				
				**Total purchase value of elec
				*rename BR_VALA br_vala
				
				**Total Qty (in MWh) of elec
				*rename BR_QAUP br_qaup
			
			*replace very small values by zero
			
			replace consnet = 0 if consnet< 0.01
			replace consbrt = 0 if consbrt< 0.01
			
			/*
			winsor  consnet , p(.03) gen(consnet_w)
			winsor  consbrt , p(.03) gen(consbrt_w)
			
			drop consnet consbrt
			rename consnet_w consnet
			rename consbrt_w consbrt
			*/
			
			*Some Label
			label var facture "Toutes energies Valeur d'achat, keuros"
			label var consnet "Consommation totale nette, tep"
			label var consbrt "Consommation totale brute, tep"
			
			label var BR_VALA "Electricite Valeur d'achat keuros"
			label var BR_QAUP "Electricité Quantité achetée     "
			label var BR_PUMS "Electricité Puissance maximale souscrite"
			
			local listeofenergy BR_VALA BR_CSTP BR_QAUP CR_VALA CR_CSTP CR_QAUP DR_VALA DR_CSTP DR_QAUP ER_VALA ER_CSTP ER_QAUP FR_VALA FR_CSTP FR_QAUP GR_VALA GR_CSTP GR_QAUP HR_VALA HR_CSTP HR_QAUP IR_VALA IR_CSTP IR_QAUP JR_VALA JR_CSTP JR_QAUP KR_VALA KR_CSTP KR_QAUP LR_VALA LR_CSTP LR_QAUP
			
			keep facture consnet consbrt `listeofenergy' BR_PUMS year siret siren poids naf2d
			
			*total conso (tep) for item B-L
			egen consbrt2 = rowtotal(BR_CSTP  CR_CSTP  DR_CSTP   ER_CSTP   FR_CSTP   GR_CSTP   HR_CSTP   IR_CSTP   JR_CSTP   KR_CSTP   LR_CSTP  )
			
			*total purchase (tep) for item B-L
			egen facture2 = rowtotal( BR_VALA  CR_VALA  DR_VALA  ER_VALA  FR_VALA  GR_VALA  HR_VALA  IR_VALA  JR_VALA  KR_VALA  LR_VALA )
			
			
			*Save the current year at estab level
			save data/eacei/eacei`year'_establevel, replace
		
		*************************************************************************
		**Construct a dataset at the siren level
		
		***Some preparation
			encode siret, gen(siret_code)
			
			gen BR_PUMS_sum = BR_PUMS
			gen BR_PUMS_min = BR_PUMS
			gen BR_PUMS_max = BR_PUMS
		
		
		***The collapse command
			local listeofenergy BR_VALA BR_CSTP BR_QAUP CR_VALA CR_CSTP CR_QAUP DR_VALA DR_CSTP DR_QAUP ER_VALA ER_CSTP ER_QAUP FR_VALA FR_CSTP FR_QAUP GR_VALA GR_CSTP GR_QAUP HR_VALA HR_CSTP HR_QAUP IR_VALA IR_CSTP IR_QAUP JR_VALA JR_CSTP JR_QAUP KR_VALA KR_CSTP KR_QAUP LR_VALA LR_CSTP LR_QAUP
			
			collapse (sum) poids facture consnet consbrt consbrt2 facture2 `listeofenergy' BR_PUMS_sum (min) BR_PUMS_min (max) BR_PUMS_max (firstnm) naf2d (count) siret_code, by(siren)

			*generate the year again
			gen year = `year'

			
			*generate price of energy
			gen p_ene_brt = facture/consbrt
			gen p_ene_net = facture/consnet
			
			label var p_ene_brt "price (keuros) per tep = facture/consbrt"
			label var p_ene_net "price (keuros) per tep = facture/consnet"
			
			
			*generate price of elec
			gen p_elect = BR_VALA/BR_QAUP //price in MWh
			gen p_elect_TEP = BR_VALA/BR_CSTP //price in tep
			
			label var p_elect "price (keuros) per MWh = br_vala/br_qaup"
			label var p_elect_TEP "price (keuros) per tep = br_vala/BR_CSTP"
			
			*Take some log
			gen lp_ene_brt = log(p_ene_brt)
			gen lp_ene_net = log(p_ene_net)
			
			gen lp_elec = log(p_elect)
			gen lp_elec_TEP = log(p_elect_TEP)
			
			*generate price of energy
			foreach lettre in B C D E F G H I J K L{
				gen p_`lettre'R = `lettre'R_VALA/`lettre'R_QAUP //price in MWh
				gen p_`lettre'R_TEP = `lettre'R_VALA/`lettre'R_CSTP //price in tep
				
				gen lp_`lettre'R = log( p_`lettre'R)
				gen lp_`lettre'R_TEP = log( p_`lettre'R_TEP)
				
				label var p_`lettre'R "price (keuros) per item unit (= `lettre'r_vala/`lettre'r_qaup)"
				label var p_`lettre'R_TEP "price (keuros) per tep (= `lettre'r_vala/`lettre'r_cstp)"
			}
			
			
			*another price
			gen p_ene_brt2 = facture2/consbrt2
			gen lp_ene_brt2 = log(p_ene_brt2)
			
			label var p_ene_brt2 "price (keuros) per tep = facture2/consbrt2 (only using B-L energy types)"
			
			*generate share of each energy items and price
			foreach lettre in B C D E F G H I J K L{
				gen `lettre'R_share_cstp = `lettre'R_CSTP/consbrt2
				gen `lettre'R_share_vala = `lettre'R_VALA/facture2
				
				label var `lettre'R_share_cstp "share in total energy consumption (tep)"
				label var `lettre'R_share_vala "share in total energy purchase (keuros)"
				
			}
					
			*Some Label
			
			
			
			
			
			*Some summary stats
			describe
			su p_ene_brt p_ene_net p_elect facture consbrt consnet 
			
			su p_ene_brt p_ene_net p_elect, d

			*Save the current year
			save data/eacei/eacei`year'_firmlevel, replace
		
		di "-------------------------- Year: `year' Saved     ----------------------"
	}


**************************** 3. Construct Panel of Establisment ****************************************************************

*Concatene all waves at the establishement level
use data/eacei/eacei2008_establevel, clear  
	
forvalues year =2009/2020{
	append using data/eacei/eacei`year'_establevel
}
	

*Save the DataSET estab
save data/eacei_establevel, replace



**************************** 4. Construct Sector-Level Panel****************************************************************


*********4.1 compute sectorl-level panel from establishement data (total consumption, expenditure, and average price...)
	*Load the DataSET for estab
	use data/eacei_establevel, clear

	*Compute 2d sector-level values and qty (aggregating using weights for establishments)

	local listeofenergy BR_VALA BR_CSTP BR_QAUP CR_VALA CR_CSTP CR_QAUP DR_VALA DR_CSTP DR_QAUP ER_VALA ER_CSTP ER_QAUP FR_VALA FR_CSTP FR_QAUP GR_VALA GR_CSTP GR_QAUP HR_VALA HR_CSTP HR_QAUP IR_VALA IR_CSTP IR_QAUP JR_VALA JR_CSTP JR_QAUP KR_VALA KR_CSTP KR_QAUP LR_VALA LR_CSTP LR_QAUP

	collapse (sum) facture consnet consbrt facture2 consbrt2 `listeofenergy' [aweight = poids] , by(year naf2d)

	*Compute sector-level unit price for each energy type
	foreach lettre in B C D E F G H I J K L{
		gen p_`lettre'R_2d = `lettre'R_VALA/`lettre'R_QAUP //price in MWh
		gen p_`lettre'R_TEP_2d = `lettre'R_VALA/`lettre'R_CSTP //price in tep
		
		gen lp_`lettre'R_2d = log( p_`lettre'R_2d)
		gen lp_`lettre'R_TEP_2d = log( p_`lettre'R_TEP_2d)
		
		label var p_`lettre'R_2d "NACE2d price (keuros) per item unit (= `lettre'R_VALA/`lettre'R_QAUP)"
		label var p_`lettre'R_TEP_2d "NACE2d price (keuros) per tep (= `lettre'R_VALA/`lettre'R_CSTP)"
		
	}
	
	*Compute sector-level generic energy price
	gen p_ene_2d = facture/consbrt
	gen p_ene2_2d = facture2/consbrt2
	
	gen lp_ene_2d = log(p_ene_2d)
	gen lp_ene2_2d = log(p_ene2_2d)
	
	label var p_ene_2d "NACE2d price (keuros) per tep (=  facture/consbrt)"
	label var p_ene2_2d "NACE2d price (keuros) per tep (=  facture2/consbrt2)"


	*Save the DataSET sector-level share and things
	save data/eacei_sector2dlevel, replace	

	keep year naf2d p* lp*

	*Save the DataSET sector-level price
	save data/eacei_price_sector2dlevel, replace	
	
	
	
**********4.2 Compute the share of energy in materials

	**Compute the material at the sector-level
		***load the firm-level data ficusfare:
		use data/ficusfare_reduced_9419_sec2_v0222, clear 

		***drop some useless variables
		capture drop acha1 acha2 acha3 acha5 acha6 cogs cogs_newdef acha1R acha2R acha3R acha5R acha6R  invcorp catotal_tot
		capture drop sale v k m o s p q

		***destring the sector code
		destring naf2d, replace
			
		***select the firms sample
			****Keep firms with positive VA	
				keep if va>0

			****Keep firms with positive catotal and inputs
				/*
				gen sample_firm_inputs = 0
				replace sample_firm_inputs = 1 if (catotalR>0 & salR>0 & immocorR>0 & acha4R>0 & autachaR>0)
				label var sample_firm_inputs "=1 if (catotalR>0 & salR>0 & immocorR>0 & acha4R>0 & autachaR>0)"
				*/
				keep if sample_firm_inputs==1

			****Keep the relevent sectors
				keep if sample_sectors2==1 //should be no change
				keep if naf2d_num != 95 & naf2d_num != 70 //Two other sectors that we want to drop
				

			****Keep siren with more than one year
				bysort siren: egen count_siren=count(catotal)
				keep  if count_siren>1
				
		***Aggregate the intermediate input at the 2digit level	
			collapse (sum) acha4 catotal sal immocor autacha catotalR acha4R salR immocorR autachaR va , by(year naf2d)	
			
			
		***Save the sector-level panel dataset on income statement
			save data/ficusfare_sector2d_incomestatement, replace 
		
	**Merge with the nergy sector-level data and compute energy share in materials
		***Load the sector-level energy 
			use  data/eacei_sector2dlevel, clear	
			
		***Merge with the inputs
			merge 1:1 year naf2d using data/ficusfare_sector2d_incomestatement	
			keep if _merge==3
			drop _merge
			
		***Compute total energy share in material 
			gen ene_share = facture/acha4
			label var ene_share "Share of energy (facture) in acha4"
			
			gen ene_share2 = facture2/acha4
			label var ene_share2 "Share of energy (facture2) in acha4"
			

		***Compute energy type share in material	
			foreach lettre in B C D E F G H I J K L{
				gen ene_`lettre'_share = `lettre'R_VALA/acha4
				
				label var ene_`lettre'_share "Share of `lettre' energy in acha4"
				
			}
			
		***Some descriptive stats
			su ene_*
			
		***rename some variables
		foreach var in  acha4 catotal sal immocor autacha catotalR acha4R salR immocorR autachaR va {
			rename `var' `var'_naf2d
		}
		
		***Compute material price proxy
					
			****Compute material price proxy with all energy type
				foreach lettre in B C D E F G H I J K L{
					gen lPmat_hat_`lettre'R = ene_`lettre'_share * lp_`lettre'R_2d
					
					gen lPmat_hat_`lettre'R_TEP = ene_`lettre'_share * lp_`lettre'R_TEP_2d
				}
				
				local list_ene lPmat_hat_BR lPmat_hat_CR lPmat_hat_DR lPmat_hat_ER lPmat_hat_FR lPmat_hat_GR lPmat_hat_HR lPmat_hat_IR lPmat_hat_JR lPmat_hat_KR lPmat_hat_LR
				egen lPmat_hat = rowtotal( `list_ene' )
				
				local list_ene_TEP lPmat_hat_BR_TEP lPmat_hat_CR_TEP lPmat_hat_DR_TEP lPmat_hat_ER_TEP lPmat_hat_FR_TEP lPmat_hat_GR_TEP lPmat_hat_HR_TEP lPmat_hat_IR_TEP lPmat_hat_JR_TEP lPmat_hat_KR_TEP lPmat_hat_LR_TEP
				egen lPmat_hat_TEP = rowtotal( `list_ene_TEP' )
				
				label var lPmat_hat "material (log) price proxy using all item of energy"
				label var lPmat_hat_TEP "material (log) price proxy using all item of energy (in TEP)"
				
			****Compute material price proxy with generic energy
			gen lPmat_hat_gene = ene_share * lp_ene_2d
			gen lPmat_hat_gene2 = ene_share2 * lp_ene2_2d
			
				label var lPmat_hat_gene "material (log) price proxy using generic energy"
				label var lPmat_hat_gene2 "material (log) price proxy using generic energy with aggregation"
				
		***Compute material price proxy with lagged share
		xtset naf2d year
					
			****Compute material price proxy with all energy type
				foreach lettre in B C D E F G H I J K L{
					gen lPmat_hat_lag_`lettre'R = l.ene_`lettre'_share * lp_`lettre'R_2d
					
					gen lPmat_hat_lag_`lettre'R_TEP = l.ene_`lettre'_share * lp_`lettre'R_TEP_2d
				}
				
				local list_ene lPmat_hat_lag_BR lPmat_hat_lag_CR lPmat_hat_lag_DR lPmat_hat_lag_ER lPmat_hat_lag_FR lPmat_hat_lag_GR lPmat_hat_lag_HR lPmat_hat_lag_IR lPmat_hat_lag_JR lPmat_hat_lag_KR lPmat_hat_lag_LR
				egen lPmat_hat_lag = rowtotal( `list_ene' )
				
				local list_ene_TEP lPmat_hat_lag_BR_TEP lPmat_hat_lag_CR_TEP lPmat_hat_lag_DR_TEP lPmat_hat_lag_ER_TEP lPmat_hat_lag_FR_TEP lPmat_hat_lag_GR_TEP lPmat_hat_lag_HR_TEP lPmat_hat_lag_IR_TEP lPmat_hat_lag_JR_TEP lPmat_hat_lag_KR_TEP lPmat_hat_lag_LR_TEP
				egen lPmat_hat_lag_TEP = rowtotal( `list_ene_TEP' )
				
				label var lPmat_hat_lag "material (log) price proxy using all item of energy with lag share"
				label var lPmat_hat_lag_TEP "material (log) price proxy using all item of energy (in TEP) with lag share"
				
			****Compute material price proxy with generic energy
			gen lPmat_hat_lag_gene = L.ene_share * lp_ene_2d
			gen lPmat_hat_lag_gene2 = L.ene_share2 * lp_ene2_2d
			
				label var lPmat_hat_lag_gene "material (log) price proxy using generic energy with lag share"
				label var lPmat_hat_lag_gene2 "material (log) price proxy using generic energy with another aggregation with lag share"
						
				
		***Some stats
		su lPmat_hat* 
		
			*****without lag
			su lPmat_hat lPmat_hat_TEP lPmat_hat_gene lPmat_hat_gene2
			pwcorr lPmat_hat lPmat_hat_TEP lPmat_hat_gene lPmat_hat_gene2
		
			*****with lag
			su lPmat_hat_lag lPmat_hat_lag_TEP lPmat_hat_lag_gene lPmat_hat_lag_gene2
			pwcorr lPmat_hat_lag lPmat_hat_lag_TEP lPmat_hat_lag_gene lPmat_hat_lag_gene2
		
				
			
		***save the sector-level data
		save data/eacei_sector2dlevel,  replace	


